from sklearn.ensemble import AdaBoostClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import ComplementNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.linear_model import Perceptron
from sklearn.neighbors._classification import RadiusNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import RidgeClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from xgboost import  XGBRFClassifier

from sklearn.ensemble import BaggingClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.discriminant_analysis import  LinearDiscriminantAnalysis
from sklearn.svm import LinearSVC
from sklearn.svm import NuSVC
from sklearn.discriminant_analysis import  QuadraticDiscriminantAnalysis
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import NearestCentroid
from catboost import CatBoostClassifier
import lightgbm as lgb

from sklearn.metrics import plot_confusion_matrix
from sklearn.preprocessing import StandardScaler, LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.metrics import cohen_kappa_score, recall_score, precision_score,f1_score, auc, roc_curve, roc_auc_score, accuracy_score
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline



import data_Balancer as balancer
import load_Data as lData
import Model_Evaluater as M_eva
#classifiers imports
import Classifiers.AdaBoosting as adab
import Classifiers.BernoulliNB as bern
import Classifiers.ComplementNB as cnb
import Classifiers.DecisionTree as dt
import Classifiers.extraTrees as ext
import Classifiers.GradientBoosting as gboost
import Classifiers.KNN as knn
import Classifiers.LogisticRegression as lgr
import Classifiers.MLP as mlp
import Classifiers.NaiveBayes as nb
import Classifiers.PassiveAggressive as passiveA
import Classifiers.Perceptron as percep
import Classifiers.RadiusNeighbors as radNB
import Classifiers.RandomForest as rf
import Classifiers.RidgeClassifier as ridg
import Classifiers.SGDClassifier as sgd
import Classifiers.SVM as svm
import Classifiers.XGBoost as xgb
import Classifiers.XGBRF as xgbr
import Classifiers.Bagging as bagg
import Classifiers.GaussianProcess as gap
import Classifiers.LDA as lda
import Classifiers.LinearSVC as lsvc
import Classifiers.NuSVC as nsvc
import Classifiers.QDA as qda
import Classifiers.MultinomialNB as mnb
import  Classifiers.NearestCentroid as nc
import  Classifiers.CatBoost as cat
import Classifiers.LightGBoost as LGB

import matplotlib.pyplot as plt
import Confussion_matrix as cm

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

def find_Null_Samples(inputdata):
    inputdata=pd.DataFrame(inputdata)
    # inputdata=inputdata.fillna(method='ffill')
    inputdata=inputdata.apply(lambda x: x.fillna(x.mean()),axis=0)
    inputdata=inputdata.values
    return inputdata

def find_Remove_Outliers(input_data):
    input_data=pd.DataFrame(input_data)
    anomalies = []
    outliersIdx=[]
    threshold=3
    # Set upper and lower limit to 3 standard deviation
    #for idx in range(0,input_data.shape[1]):
    for (columnName,columndata) in input_data.iteritems():
        random_data_std = np.std(columndata)
        random_data_mean = np.mean(columndata)
    # Generate outliers
        columndata=pd.DataFrame(columndata)
        for (idx, dataSample) in columndata.iterrows():
            z_score= (dataSample.iloc[0] - random_data_mean)/random_data_std
            if np.abs(z_score) > threshold:
                anomalies.append(dataSample.iloc[0])
                outliersIdx.append(idx)
    input_data=input_data.drop(outliersIdx).reset_index(drop=True)
    return input_data





if __name__ =='__main__':

    dataset_name_ = {'ourdataset': 'new_Data/ourdataset.csv', 'fontana': 'new_Data/fontana.csv', 'fabiano': 'new_Data/fabiano.csv','feature_selection': 'new_Data/feature_selection.csv'}

    dataset = ''
    balanced_=[True, False]

for balanced in balanced_:
    for dataset_name, datapath in dataset_name_ .items():
        # load the data and split the input and the output
        inputD = lData.load_data(datapath)

        # replace the null value in the input data with the mean
        data = find_Null_Samples(inputD)


    # 10 cross validation

        from sklearn.model_selection import KFold
        kfold = KFold(10, True, 1)
        for train, test in kfold.split(data):
            print('train: %s, test: %s' % (len(data[train]), len(data[test])))
            # #### Training data for each fold
            x_train=data[train]
            y_train =x_train[:,-1]
            x_train = x_train[:, :-1]
            # #### testing data for each fold
            x_test=data[test]
            y_test =x_test[:,-1]
            x_test = x_test[:, :-1]
###### Data balancing
            if balanced==True:
                x_train, y_train = balancer.data_balancing(x_train, y_train)
                print('train balanced: %s, test: %s' % (len(x_train), len(x_test)))
                dataset=dataset_name+'_balanced'
            else:
                dataset = dataset_name + '_normal'
                pass

    ###### Normlizing both the training and testing data

            scaler = MinMaxScaler(feature_range=(0, 1)).fit(x_train)
            x_train = scaler.transform(x_train)
            x_test = scaler.transform(x_test)
        #######################################################################################################################
        ########################################### New algorithms ############################################################

            extModel = ext.EtraTree_training_tuning(x_train, y_train,dataset)
            extAcc, extKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, extModel, 'ExtraTree',dataset)
            print('ExtraTree  accuracy  %.3f  kappa %.3f ' % (extAcc, extKap))

            knnModel = knn.KNN_training_tuning(x_train, y_train,dataset)
            knnAcc, knnKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, knnModel, 'KNN',dataset)
            print('KNN  accuracy  %.3f  kappa %.3f ' % (knnAcc, knnKap))

            lgrModel = lgr.LogisticRegression_training_tuning(x_train, y_train,dataset)
            lgrAcc, lgrKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, lgrModel, 'LogisticRegression',dataset)
            print('LogisticRegression  accuracy  %.3f  kappa %.3f ' % (lgrAcc, lgrKap))


            lgbModel = LGB.LGB_training_tuning(x_train, y_train,dataset)
            lgbAcc, lgbKap= M_eva.evaluate_model(x_train, y_train, x_test, y_test, lgbModel, 'LGB',dataset)
            print('LGB  accuracy  %.3f  kappa %.3f ' % (lgbAcc, lgbKap))


            baggingModel = bagg.Bagging_training_tuning(x_train, y_train,dataset)
            baggAcc, baggKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, baggingModel, 'Bagging',dataset)
            print('Bagging  accuracy  %.3f  kappa %.3f ' % (baggAcc, baggKap))

            ldaModel = lda.LDA_training_tuning(x_train, y_train,dataset)
            ldaAcc, ldaKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, ldaModel, 'LDA',dataset)
            print('Bagging  accuracy  %.3f  kappa %.3f ' % (ldaAcc, ldaKap))


            qdaModel = qda.QDA_training_tuning(x_train, y_train,dataset)
            qdaAcc, qdaKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, qdaModel, 'QDA',dataset)
            print('QDA  accuracy  %.3f  kappa %.3f ' % (qdaAcc, qdaKap))


            catModel = cat.catB_training_tuning(x_train, y_train,dataset)
            catAcc, catKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, catModel, 'CatBoostClassifier',dataset)
            print('CatBoostClassifier  accuracy  %.3f  kappa %.3f ' % (catAcc, catKap))


            bernModel = bern.BernoulliNB_training_tuning(x_train, y_train,dataset)
            bernAcc, bernKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, bernModel, 'BernoulliNB',dataset)
            print('BernoulliNB  accuracy  %.3f  kappa %.3f ' % (bernAcc, bernKap))

            mlpModel = mlp.MLP_training_tuning(x_train, y_train,dataset)
            mlpAcc, mlpKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, mlpModel, 'MLP',dataset)
            print('MLP  accuracy  %.3f  kappa %.3f ' % (mlpAcc, mlpKap))



            xgbModel = xgb.XGB_training_tuning(x_train, y_train,dataset)
            xgbAcc, xgbKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, xgbModel, 'XGB',dataset)
            print('XGB  accuracy  %.3f  kappa %.3f ' % (xgbAcc, xgbKap))

            xgbrModel = xgbr.XGBRF_training_tuning(x_train, y_train,dataset)
            xgbrAcc, xgbrKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, xgbrModel, 'XGBRF',dataset)
            print('XGBRF  accuracy  %.3f  kappa %.3f ' % (xgbrAcc, xgbrKap))


            dtModel = dt.DT_training_tuning(x_train, y_train,dataset)
            dtAcc, dtKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, dtModel, 'DT',dataset)
            print('DT  accuracy  %.3f  kappa %.3f ' % (dtAcc, dtKap))

            nbModel = nb.NB_training_tuning(x_train, y_train,dataset)
            nbAcc, nbKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, nbModel, 'NB',dataset)
            print('NB  accuracy  %.3f  kappa %.3f ' % (nbAcc, nbKap))

            ridgModel = ridg.Ridge_training_tuning(x_train, y_train,dataset)
            ridgAcc, ridgKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, ridgModel, 'Ridge',dataset)
            print('Ridge  accuracy  %.3f  kappa %.3f ' % (ridgAcc, ridgKap))

            sgdModel = sgd.SGD_training_tuning(x_train, y_train,dataset)
            sgdAcc, sgdKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, sgdModel, 'SGD',dataset)
            print('SGD  accuracy  %.3f  kappa %.3f ' % (sgdAcc, sgdKap))

            percepModel = percep.Perceptron_training_tuning(x_train, y_train,dataset)
            percepAcc, percepKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, percepModel, 'Perceptron',dataset)
            print('Perceptron  accuracy  %.3f  kappa %.3f ' % (percepAcc, percepKap))

            rfModel = rf.RandomForest_training_tuning(x_train, y_train,dataset)
            rfAcc, rfKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, rfModel, 'RandomForest',dataset)
            print('RandomForest  accuracy  %.3f  kappa %.3f ' % (rfAcc, rfKap))

            passiveAModel = passiveA.PassiveAggressive_training_tuning(x_train, y_train,dataset)
            passiveAAcc, passiveAKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, passiveAModel,
                                                            'PassiveAggressive',dataset)
            print('PassiveAggressive  accuracy  %.3f  kappa %.3f ' % (passiveAAcc, passiveAKap))


            adaModel=adab.AdaBoost_training_tuning(x_train,y_train,dataset)
            adaAcc, adaKap=M_eva.evaluate_model(x_train,y_train,x_test,y_test,adaModel,'AdaBoost',dataset)
            print('AdaBoost  accuracy  %.3f  kappa %.3f ' %(adaAcc, adaKap))


            gboostModel = gboost.GradientBoosting_training_tuning(x_train, y_train,dataset)
            gboostAcc, gboostKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, gboostModel, 'GradientBoosting',dataset)
            print('GradientBoosting  accuracy  %.3f  kappa %.3f ' % (gboostAcc, gboostKap))



            gapModel = gap.GaussianProcess_training_tuning(x_train, y_train,dataset)
            gapAcc, gapKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, gapModel, 'GaussianProcess',dataset)
            print('Bagging  accuracy  %.3f  kappa %.3f ' % (gapAcc, gapKap))



            svmModel = svm.Svm_training_tuning(x_train, y_train,dataset)
            svmAcc, svmKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, svmModel, 'SVM',dataset)
            print('SVM  accuracy  %.3f  kappa %.3f ' % (svmAcc, svmKap))


            lsvcModel = lsvc.LinearSVC_training_tuning(x_train, y_train,dataset)
            lsvcAcc, lsvcKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, lsvcModel, 'LinearSVC',dataset)
            print('LinearSVC  accuracy  %.3f  kappa %.3f ' % (lsvcAcc, lsvcKap))

            ncModel = nc.NearestCentroid_training_tuning(x_train, y_train,dataset)
            ncAcc, ncKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, ncModel, 'NearestCentroid',dataset)
            print('NearestCentroid  accuracy  %.3f  kappa %.3f ' % (ncAcc, ncKap))

            nsvcModel = nsvc.NuSVC_training_tuning(x_train, y_train, dataset)
            nsvcAcc, nsvcKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, nsvcModel, 'NuSVC', dataset)
            print('NuSVC  accuracy  %.3f  kappa %.3f ' % (nsvcAcc, nsvcKap))

            #####################################################################################################################
            # ValueError: Negative values in data passed to ComplementNB (input X)
            #####################################################################################################################
            cnbModel = cnb.ComplementNB_training_tuning(x_train, y_train,dataset)
            cnbAcc, cnbKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, cnbModel, 'ComplementNB',dataset)
            print('ComplementNB  accuracy  %.3f  kappa %.3f ' % (cnbAcc, cnbKap))

            mnbModel = mnb.MultinomialNB_training_tuning(x_train, y_train,dataset)
            mnbAcc, mnbKap = M_eva.evaluate_model(x_train, y_train, x_test, y_test, mnbModel, 'MultinomialNB',dataset)
            print('MultinomialNB  accuracy  %.3f  kappa %.3f ' % (mnbAcc, mnbKap))

